import numpy as np
import pandas as pd
A = pd.read_csv("FashionDataset.csv")
A
Unnamed: 0 | BrandName | Deatils | Sizes | MRP | SellPrice | Discount | Category | |
---|---|---|---|---|---|---|---|---|
0 | 0 | life | solid cotton blend collar neck womens a-line d... | Size:Large,Medium,Small,X-Large,X-Small | Rs\n1699 | 849 | 50% off | Westernwear-Women |
1 | 1 | only | polyester peter pan collar womens blouson dres... | Size:34,36,38,40 | Rs\n3499 | 2449 | 30% off | Westernwear-Women |
2 | 2 | fratini | solid polyester blend wide neck womens regular... | Size:Large,X-Large,XX-Large | Rs\n1199 | 599 | 50% off | Westernwear-Women |
3 | 3 | zink london | stripes polyester sweetheart neck womens dress... | Size:Large,Medium,Small,X-Large | Rs\n2299 | 1379 | 40% off | Westernwear-Women |
4 | 4 | life | regular fit regular length denim womens jeans ... | Size:26,28,30,32,34,36 | Rs\n1699 | 849 | 50% off | Westernwear-Women |
... | ... | ... | ... | ... | ... | ... | ... | ... |
30753 | 21 | swarovski | crystal stylish womens rodhium earrings | Nan | Nan | 8950 | Nan | Jewellery-Women |
30754 | 22 | Nan | Nan | Nan | Nan | Nan | Nan | Jewellery-Women |
30755 | 23 | jewelz | ethnic gold plated jhumki earrings | Nan | Rs\n1839 | 643 | 65% off | Jewellery-Women |
30756 | 24 | estelle | womens gold plated double line fancy white and... | Nan | Nan | 2799 | Nan | Jewellery-Women |
30757 | 25 | estelle | womens gold plated bridge designer mangalsutra... | Nan | Nan | 1899 | Nan | Jewellery-Women |
30758 rows × 8 columns
A.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 30758 entries, 0 to 30757 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 30758 non-null int64 1 BrandName 30758 non-null object 2 Deatils 30758 non-null object 3 Sizes 30758 non-null object 4 MRP 30758 non-null object 5 SellPrice 30758 non-null object 6 Discount 30758 non-null object 7 Category 30758 non-null object dtypes: int64(1), object(7) memory usage: 1.9+ MB
A.isna().sum()
A= A.drop(labels=["Unnamed: 0"],axis=1)
A.shape
(30758, 7)
A
BrandName | Deatils | Sizes | MRP | SellPrice | Discount | Category | |
---|---|---|---|---|---|---|---|
0 | life | solid cotton blend collar neck womens a-line d... | Size:Large,Medium,Small,X-Large,X-Small | Rs\n1699 | 849 | 50% off | Westernwear-Women |
1 | only | polyester peter pan collar womens blouson dres... | Size:34,36,38,40 | Rs\n3499 | 2449 | 30% off | Westernwear-Women |
2 | fratini | solid polyester blend wide neck womens regular... | Size:Large,X-Large,XX-Large | Rs\n1199 | 599 | 50% off | Westernwear-Women |
3 | zink london | stripes polyester sweetheart neck womens dress... | Size:Large,Medium,Small,X-Large | Rs\n2299 | 1379 | 40% off | Westernwear-Women |
4 | life | regular fit regular length denim womens jeans ... | Size:26,28,30,32,34,36 | Rs\n1699 | 849 | 50% off | Westernwear-Women |
... | ... | ... | ... | ... | ... | ... | ... |
30753 | swarovski | crystal stylish womens rodhium earrings | Nan | Nan | 8950 | Nan | Jewellery-Women |
30754 | Nan | Nan | Nan | Nan | Nan | Nan | Jewellery-Women |
30755 | jewelz | ethnic gold plated jhumki earrings | Nan | Rs\n1839 | 643 | 65% off | Jewellery-Women |
30756 | estelle | womens gold plated double line fancy white and... | Nan | Nan | 2799 | Nan | Jewellery-Women |
30757 | estelle | womens gold plated bridge designer mangalsutra... | Nan | Nan | 1899 | Nan | Jewellery-Women |
30758 rows × 7 columns
A.nunique()
BrandName 275 Deatils 23877 Sizes 1172 MRP 1097 SellPrice 2046 Discount 66 Category 7 dtype: int64
A.replace("Nan",np.nan,inplace=True)
A.dropna(axis=0,inplace=True)
A
BrandName | Deatils | Sizes | MRP | SellPrice | Discount | Category | |
---|---|---|---|---|---|---|---|
0 | life | solid cotton blend collar neck womens a-line d... | Size:Large,Medium,Small,X-Large,X-Small | Rs\n1699 | 849 | 50% off | Westernwear-Women |
1 | only | polyester peter pan collar womens blouson dres... | Size:34,36,38,40 | Rs\n3499 | 2449 | 30% off | Westernwear-Women |
2 | fratini | solid polyester blend wide neck womens regular... | Size:Large,X-Large,XX-Large | Rs\n1199 | 599 | 50% off | Westernwear-Women |
3 | zink london | stripes polyester sweetheart neck womens dress... | Size:Large,Medium,Small,X-Large | Rs\n2299 | 1379 | 40% off | Westernwear-Women |
4 | life | regular fit regular length denim womens jeans ... | Size:26,28,30,32,34,36 | Rs\n1699 | 849 | 50% off | Westernwear-Women |
... | ... | ... | ... | ... | ... | ... | ... |
26673 | lemon & pepper | womens casual wear buckle closure flats - navy | Size:36,37,38,39,40 | Rs\n2999 | 1499 | 50% off | Footwear-Women |
26674 | haute curry | womens casual wear slip on heels - black | Size:36,37,38,39,40 | Rs\n2199 | 1099 | 50% off | Footwear-Women |
26885 | swiss eagle | womens analogue metallic watch | Size:Error Size | Rs\n13990 | 4197 | 70% off | Watches-Women |
27290 | lawman watches | womens rose gold dial stainless steel analogue... | Size:Error Size | Rs\n7499 | 4999 | 33% off | Watches-Women |
28418 | lawman watches | womens silver dial stainless steel analogue wa... | Size:Error Size | Rs\n5999 | 3999 | 33% off | Watches-Women |
18374 rows × 7 columns
Q = []
from re import sub
for i in A.MRP:
Q.append(int(sub("[Rs\n]","",i)))
A.MRP = Q
S = A.SellPrice
X = pd.to_numeric(S)
A.SellPrice = X
A.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 18374 entries, 0 to 28418 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 BrandName 18374 non-null object 1 Deatils 18374 non-null object 2 Sizes 18374 non-null object 3 MRP 18374 non-null int64 4 SellPrice 18374 non-null int64 5 Discount 18374 non-null object 6 Category 18374 non-null object dtypes: int64(2), object(5) memory usage: 1.1+ MB
A.head()
BrandName | Deatils | Sizes | MRP | SellPrice | Discount | Category | |
---|---|---|---|---|---|---|---|
0 | life | solid cotton blend collar neck womens a-line d... | Size:Large,Medium,Small,X-Large,X-Small | 1699 | 849 | 50% off | Westernwear-Women |
1 | only | polyester peter pan collar womens blouson dres... | Size:34,36,38,40 | 3499 | 2449 | 30% off | Westernwear-Women |
2 | fratini | solid polyester blend wide neck womens regular... | Size:Large,X-Large,XX-Large | 1199 | 599 | 50% off | Westernwear-Women |
3 | zink london | stripes polyester sweetheart neck womens dress... | Size:Large,Medium,Small,X-Large | 2299 | 1379 | 40% off | Westernwear-Women |
4 | life | regular fit regular length denim womens jeans ... | Size:26,28,30,32,34,36 | 1699 | 849 | 50% off | Westernwear-Women |
import seaborn as sb
sb.distplot(A.MRP)
<AxesSubplot:xlabel='MRP', ylabel='Density'>
import seaborn as sb
sb.distplot(A.SellPrice)
<AxesSubplot:xlabel='SellPrice', ylabel='Density'>
A.describe()
MRP | SellPrice | |
---|---|---|
count | 18374.000000 | 18374.000000 |
mean | 2136.928704 | 1163.798846 |
std | 1189.416850 | 744.201506 |
min | 171.000000 | 114.000000 |
25% | 1299.000000 | 659.000000 |
50% | 1899.000000 | 995.000000 |
75% | 2663.000000 | 1469.000000 |
max | 16999.000000 | 13599.000000 |
import matplotlib.pyplot as plt
import seaborn as sb
plt.figure(figsize=(15,10))
plt.subplot(1,1,1)
sb.countplot(A.Category)
<AxesSubplot:xlabel='Category', ylabel='count'>
A.BrandName.value_counts().head(25).plot(kind="pie")
<AxesSubplot:ylabel='BrandName'>
A.Sizes.value_counts().head(30).plot(kind="bar")
<AxesSubplot:>
sb.scatterplot(A.SellPrice,A.MRP,hue=A.Category)
plt.xlabel("SellPrice")
plt.ylabel("MRP")
plt.xticks(range(0,14000,3000))
([<matplotlib.axis.XTick at 0x1b9808f1160>, <matplotlib.axis.XTick at 0x1b9808f1130>, <matplotlib.axis.XTick at 0x1b98091aa00>, <matplotlib.axis.XTick at 0x1b98092c7c0>, <matplotlib.axis.XTick at 0x1b98092c760>], [Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, '')])
plt.figure(figsize=(20,10))
sb.boxplot(A.Category,A.SellPrice)
#Comparison between the features to know about sellprice with different different Category
<AxesSubplot:xlabel='Category', ylabel='SellPrice'>
plt.figure(figsize=(20,10))
sb.boxplot(A.Category,A.MRP)
#Comparison between the features to know about MRP with different different Category
<AxesSubplot:xlabel='Category', ylabel='MRP'>
sb.heatmap(A.corr())
<AxesSubplot:>